{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import subprocess\n",
    "import json\n",
    "import hail as hl\n",
    "hl.init(spark_conf={\"spark.hadoop.fs.gs.requester.pays.mode\": \"AUTO\",\n",
    "                    \"spark.hadoop.fs.gs.requester.pays.project.id\": \"broad-ctsa\"})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "### GTEx v8 eQTL tissue-specific all SNP gene associations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "Files in `gs://gtex-resources/GTEx_Analysis_v8_QTLs/GTEx_Analysis_v8_eQTL_all_associations/` were gzipped, so we need to get them bgzipped and moved over to `gs://hail-datasets-tmp`. First I generated a text file for the input paths and  a text file for desired output paths."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# Generate list of all eQTL all association files in gs://gtex-resources\n",
    "list_eqtl_files_gz = subprocess.run([\"gsutil\",\n",
    "                                     \"-u\",\n",
    "                                     \"broad-ctsa\",\n",
    "                                     \"ls\",\n",
    "                                     \"gs://gtex-resources/GTEx_Analysis_v8_QTLs/GTEx_Analysis_v8_eQTL_all_associations/\"],\n",
    "                                stdout=subprocess.PIPE)\n",
    "eqtl_files_gz = list_eqtl_files_gz.stdout.decode('utf-8').split()\n",
    "\n",
    "# Write eQTL file paths to text for input\n",
    "with open(\"gtex_eQTL_paths_in.txt\", \"w\") as f:\n",
    "    for eqtl_file in eqtl_files_gz:\n",
    "        f.write(f\"{eqtl_file}\\n\")\n",
    "\n",
    "# Change bucket to \"gs://hail-datasets-tmp\" and filename extension to \".bgz\" and write to another text file for output\n",
    "with open(\"gtex_eQTL_paths_out.txt\", \"w\") as f:\n",
    "    for eqtl_file in eqtl_files_gz:\n",
    "        eqtl_file_out = eqtl_file.replace(\"gs://gtex-resources\", \"gs://hail-datasets-tmp\").replace(\".gz\", \".bgz\")\n",
    "        f.write(f\"{eqtl_file_out}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "After generating the text files as above, ran the below to get the files bgzipped so we can read them in and create Hail Tables.\n",
    "\n",
    "```\n",
    "paste gtex_eQTL_paths_in.txt gtex_eQTL_paths_out.txt |\n",
    "while read infile outfile;\n",
    "do\n",
    "  gsutil -u broad-ctsa cat $infile |\n",
    "  gzip -d |\n",
    "  bgzip -c |\n",
    "  gsutil cp - $outfile\n",
    "done\n",
    "```\n",
    "\n",
    "Now can generate Hail Tables (do this on a cluster):"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create GTEx v8 eQTL Hail Tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# Generate list of .bgz files in gs://hail-datasets-tmp\n",
    "with open(\"gtex_eQTL_paths_out.txt\") as f:\n",
    "    eqtl_files = f.read().splitlines()\n",
    "\n",
    "for eqtl_file in eqtl_files_bgz:\n",
    "    print(eqtl_file)\n",
    "    ht = hl.import_table(eqtl_file,\n",
    "                         force_bgz=True,\n",
    "                         types = {\"gene_id\": hl.tstr,\n",
    "                                  \"variant_id\": hl.tstr,\n",
    "                                  \"tss_distance\": hl.tint32,\n",
    "                                  \"ma_samples\": hl.tint32,\n",
    "                                  \"ma_count\": hl.tint32,\n",
    "                                  \"maf\": hl.tfloat64,\n",
    "                                  \"pval_nominal\": hl.tfloat64,\n",
    "                                  \"slope\": hl.tfloat64,\n",
    "                                  \"slope_se\": hl.tfloat64})\n",
    "\n",
    "    name = \"GTEx_eQTL_allpairs_\" + eqtl_file.split(\".\")[0].split(\"/\")[-1]\n",
    "    version = \"v8\"\n",
    "    build = \"GRCh38\"\n",
    "\n",
    "    ht2 = ht.annotate(locus = hl.locus(ht.variant_id.split(\"_\")[0],\n",
    "                                       hl.int(ht.variant_id.split(\"_\")[1]),\n",
    "                                       reference_genome=build),\n",
    "                      alleles = [ht.variant_id.split(\"_\")[2],\n",
    "                                 ht.variant_id.split(\"_\")[3]])\n",
    "    ht2 = ht2.select(\"locus\", \"alleles\", \"gene_id\", \"variant_id\", \"tss_distance\",\n",
    "                     \"ma_samples\", \"ma_count\", \"maf\", \"pval_nominal\", \"slope\", \"slope_se\")\n",
    "    ht2 = ht2.key_by(\"locus\", \"alleles\")\n",
    "\n",
    "    n_rows = ht2.count()\n",
    "    n_partitions = ht2.n_partitions()\n",
    "\n",
    "    ht2 = ht2.annotate_globals(metadata=hl.struct(name=name,\n",
    "                                                  version=version,\n",
    "                                                  reference_genome=build,\n",
    "                                                  n_rows=n_rows,\n",
    "                                                  n_partitions=n_partitions))\n",
    "\n",
    "    for region in [\"us\"]:\n",
    "        output_file = f\"gs://hail-datasets-{region}/{name}_{version}_{build}.ht\"\n",
    "        ht2.write(output_file, overwrite=False)\n",
    "\n",
    "    print(f\"Wrote {name} to Hail Table.\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Add entries for eQTL Hail Tables to config"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now can create entries in `datasets.json` for new tables:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# Open our datasets config file so we can add our new entries\n",
    "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n",
    "with open(datasets_path, \"r\") as f:\n",
    "    datasets = json.load(f)\n",
    "\n",
    "# Get list of GTEx eQTL tables in hail-datasets-us\n",
    "list_datasets = subprocess.run([\"gsutil\", \"-u\", \"broad-ctsa\", \"ls\", \"gs://hail-datasets-us\"], stdout=subprocess.PIPE)\n",
    "all_datasets = list_datasets.stdout.decode('utf-8').split()\n",
    "tables = [x.strip(\"/\") for x in all_datasets if \"GTEx_eQTL_allpairs_\" in x]\n",
    "\n",
    "for table in tables:\n",
    "    gs_us_url = table\n",
    "    gs_eu_url = table.replace(\"hail-datasets-us\", \"hail-datasets-eu\")\n",
    "    aws_url = table.replace(\"gs\", \"s3\", 1).replace(\"hail-datasets-us\", \"hail-datasets-us-east-1\")\n",
    "\n",
    "    full_table_name = table.split(\"/\")[-1]\n",
    "\n",
    "    build = full_table_name.split(\"_\")[-1].replace(\".ht\", \"\")\n",
    "    version = full_table_name.split(\"_\")[-2]\n",
    "    tissue_name = full_table_name.replace(\"GTEx_eQTL_allpairs_\", \"\").replace(f\"_{version}_{build}.ht\", \"\")\n",
    "\n",
    "    json_entry = {\n",
    "            \"annotation_db\": {\n",
    "                \"key_properties\": []\n",
    "            },\n",
    "            \"description\": f\"GTEx: {tissue_name} eQTL tissue-specific all SNP gene \"\n",
    "                           f\"associations Hail Table. All variant-gene cis-eQTL associations \"\n",
    "                           f\"tested in each tissue (including non-significant associations).\",\n",
    "            \"url\": \"https://gtexportal.org/home/datasets\",\n",
    "            \"versions\": [\n",
    "                {\n",
    "                    \"reference_genome\": build,\n",
    "                    \"url\": {\n",
    "                        \"aws\": {\n",
    "                            \"us\": f\"{aws_url}\"\n",
    "                        },\n",
    "                        \"gcp\": {\n",
    "                            \"us\": f\"{gs_us_url}\",\n",
    "                            \"eu\": f\"{gs_eu_url}\"\n",
    "                        }\n",
    "                    },\n",
    "                    \"version\": version\n",
    "                }\n",
    "            ]\n",
    "        }\n",
    "    datasets[f\"GTEx_eQTL_allpairs_{tissue_name}\"] = json_entry\n",
    "\n",
    "# Write new entries back to datasets.json config:\n",
    "with open(datasets_path, \"w\") as f:\n",
    "    json.dump(datasets, f, sort_keys=True, ensure_ascii=False, indent=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create schemas for docs for eQTL Hail Tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import textwrap\n",
    "\n",
    "output_dir = os.path.abspath(\"../../hail/python/hail/docs/datasets/schemas\")\n",
    "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n",
    "with open(datasets_path, \"r\") as f:\n",
    "    datasets = json.load(f)\n",
    "\n",
    "names = [name for name in list(datasets.keys()) if \"GTEx_eQTL_allpairs_\" in name]\n",
    "for name in names:\n",
    "    versions = sorted(set(dataset[\"version\"] for dataset in datasets[name][\"versions\"]))\n",
    "    if not versions:\n",
    "        versions = [None]\n",
    "    reference_genomes = sorted(set(dataset[\"reference_genome\"] for dataset in datasets[name][\"versions\"]))\n",
    "    if not reference_genomes:\n",
    "        reference_genomes = [None]\n",
    "\n",
    "    print(name)\n",
    "    print(versions[0])\n",
    "    print(reference_genomes[0] + \"\\n\")\n",
    "\n",
    "    path = [dataset[\"url\"][\"gcp\"][\"us\"]\n",
    "            for dataset in datasets[name][\"versions\"]\n",
    "            if all([dataset[\"version\"] == versions[0],\n",
    "                    dataset[\"reference_genome\"] == reference_genomes[0]])]\n",
    "    assert len(path) == 1\n",
    "    path = path[0]\n",
    "\n",
    "    table = hl.methods.read_table(path)\n",
    "    description = table.describe(handler=lambda x: str(x)).split(\"\\n\")\n",
    "    description = \"\\n\".join([line.rstrip() for line in description])\n",
    "\n",
    "    if path.endswith(\".ht\"):\n",
    "        table_class = \"hail.Table\"\n",
    "    else:\n",
    "        table_class = \"hail.MatrixTable\"\n",
    "\n",
    "    template = \"\"\".. _{dataset}:\n",
    "\n",
    "{dataset}\n",
    "{underline1}\n",
    "\n",
    "*  **Versions:** {versions}\n",
    "*  **Reference genome builds:** {ref_genomes}\n",
    "*  **Type:** :class:`{class}`\n",
    "\n",
    "Schema ({version0}, {ref_genome0})\n",
    "{underline2}\n",
    "\n",
    ".. code-block:: text\n",
    "\n",
    "{schema}\n",
    "\n",
    "\"\"\"\n",
    "    context = {\n",
    "        \"dataset\": name,\n",
    "        \"underline1\": len(name) * \"=\",\n",
    "        \"version0\": versions[0],\n",
    "        \"ref_genome0\": reference_genomes[0],\n",
    "        \"versions\": \", \".join([str(version) for version in versions]),\n",
    "        \"ref_genomes\": \", \".join([str(reference_genome) for reference_genome in reference_genomes]),\n",
    "        \"underline2\": len(\"\".join([\"Schema (\", str(versions[0]), \", \", str(reference_genomes[0]), \")\"])) * \"~\",\n",
    "        \"schema\": textwrap.indent(description, \"    \"),\n",
    "        \"class\": table_class\n",
    "     }\n",
    "    with open(output_dir + f\"/{name}.rst\", \"w\") as f:\n",
    "        f.write(template.format(**context).strip())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "source": [
    "### GTEx v8 sQTL tissue-specific all SNP gene associations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate list of all sQTL all association files in gs://gtex-resources\n",
    "list_sqtl_files_gz = subprocess.run([\"gsutil\",\n",
    "                                     \"-u\",\n",
    "                                     \"broad-ctsa\",\n",
    "                                     \"ls\",\n",
    "                                     \"gs://gtex-resources/GTEx_Analysis_v8_QTLs/GTEx_Analysis_v8_sQTL_all_associations/\"],\n",
    "                                stdout=subprocess.PIPE)\n",
    "sqtl_files_gz = list_sqtl_files_gz.stdout.decode('utf-8').split()\n",
    "\n",
    "# Write sQTL file paths to text for input\n",
    "with open(\"gtex_sQTL_paths_in.txt\", \"w\") as f:\n",
    "    for sqtl_file in sqtl_files_gz:\n",
    "        f.write(f\"{sqtl_file}\\n\")\n",
    "\n",
    "# Change bucket to \"gs://hail-datasets-tmp\" and filename extension to \".bgz\" and write to another text file for output\n",
    "with open(\"gtex_sQTL_paths_out.txt\", \"w\") as f:\n",
    "    for sqtl_file in sqtl_files_gz:\n",
    "        sqtl_file_out = sqtl_file.replace(\"gs://gtex-resources\", \"gs://hail-datasets-tmp\").replace(\".gz\", \".bgz\")\n",
    "        f.write(f\"{sqtl_file_out}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Files were converted from .gz to .bgz in same way as eQTL files were above.\n",
    "```\n",
    "paste gtex_sQTL_paths_in.txt gtex_sQTL_paths_out.txt |\n",
    "while read infile outfile;\n",
    "do\n",
    "  gsutil -u broad-ctsa cat $infile |\n",
    "  gzip -d |\n",
    "  bgzip -c |\n",
    "  gsutil cp - $outfile\n",
    "done\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create GTEx v8 sQTL Hail Tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate list of .bgz files in gs://hail-datasets-tmp\n",
    "with open(\"gtex_sQTL_paths_out.txt\") as f:\n",
    "    sqtl_files = f.read().splitlines()\n",
    "\n",
    "for sqtl_file in sqtl_files:\n",
    "    print(sqtl_file)\n",
    "    ht = hl.import_table(sqtl_file, \n",
    "                         force_bgz=True,\n",
    "                         types = {\"phenotype_id\": hl.tstr,\n",
    "                                  \"variant_id\": hl.tstr,\n",
    "                                  \"tss_distance\": hl.tint32,\n",
    "                                  \"ma_samples\": hl.tint32,\n",
    "                                  \"ma_count\": hl.tint32,\n",
    "                                  \"maf\": hl.tfloat64,\n",
    "                                  \"pval_nominal\": hl.tfloat64,\n",
    "                                  \"slope\": hl.tfloat64,\n",
    "                                  \"slope_se\": hl.tfloat64})\n",
    "\n",
    "    name = \"GTEx_sQTL_allpairs_\" + sqtl_file.split(\".\")[0].split(\"/\")[-1]\n",
    "    version = \"v8\"\n",
    "    build = \"GRCh38\"\n",
    "\n",
    "    ht2 = ht.annotate(intron = hl.locus_interval(ht.phenotype_id.split(\":\")[0],\n",
    "                                                 hl.int32(ht.phenotype_id.split(\":\")[1]), \n",
    "                                                 hl.int32(ht.phenotype_id.split(\":\")[2]), \n",
    "                                                 reference_genome=\"GRCh38\"),\n",
    "                      cluster = ht.phenotype_id.split(\":\")[-2],\n",
    "                      gene_id = ht.phenotype_id.split(\":\")[-1],\n",
    "                      locus = hl.locus(ht.variant_id.split(\"_\")[0], \n",
    "                                       hl.int(ht.variant_id.split(\"_\")[1]), \n",
    "                                       reference_genome=build),\n",
    "                      alleles = [ht.variant_id.split(\"_\")[2], \n",
    "                                 ht.variant_id.split(\"_\")[3]])\n",
    "    ht2 = ht2.annotate(phenotype_id = hl.struct(intron=ht2.intron, \n",
    "                                                cluster=ht2.cluster, \n",
    "                                                gene_id=ht2.gene_id))\n",
    "    ht2 = ht2.select(\"locus\", \"alleles\", \"phenotype_id\", \"tss_distance\", \n",
    "                     \"ma_samples\", \"ma_count\", \"maf\", \"pval_nominal\", \"slope\", \"slope_se\")\n",
    "\n",
    "    n_rows = ht2.count()\n",
    "    n_partitions = ht2.n_partitions()\n",
    "\n",
    "    ht2 = ht2.annotate_globals(metadata=hl.struct(name=name,\n",
    "                                                  version=version,\n",
    "                                                  reference_genome=build,\n",
    "                                                  n_rows=n_rows,\n",
    "                                                  n_partitions=n_partitions))\n",
    "    ht2 = ht2.key_by(\"locus\", \"alleles\")\n",
    "\n",
    "    for region in [\"us\"]:\n",
    "        output_file = f\"gs://hail-datasets-{region}/{name}_{version}_{build}.ht\"\n",
    "        ht2.write(output_file, overwrite=False)\n",
    "\n",
    "    print(f\"Wrote {name} to Hail Table.\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Add entries for sQTL Hail Tables to config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Open our datasets config file so we can add our new entries\n",
    "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n",
    "with open(datasets_path, \"r\") as f:\n",
    "    datasets = json.load(f)\n",
    "\n",
    "# Get list of GTEx sQTL tables in hail-datasets-us\n",
    "list_datasets = subprocess.run([\"gsutil\", \"-u\", \"broad-ctsa\", \"ls\", \"gs://hail-datasets-us\"], stdout=subprocess.PIPE)\n",
    "all_datasets = list_datasets.stdout.decode('utf-8').split()\n",
    "tables = [x.strip(\"/\") for x in all_datasets if \"GTEx_sQTL_allpairs_\" in x]\n",
    "\n",
    "for table in tables:\n",
    "    gs_us_url = table\n",
    "    gs_eu_url = table.replace(\"hail-datasets-us\", \"hail-datasets-eu\")\n",
    "    aws_url = table.replace(\"gs\", \"s3\", 1).replace(\"hail-datasets-us\", \"hail-datasets-us-east-1\")\n",
    "\n",
    "    full_table_name = table.split(\"/\")[-1]\n",
    "\n",
    "    build = full_table_name.split(\"_\")[-1].replace(\".ht\", \"\")\n",
    "    version = full_table_name.split(\"_\")[-2]\n",
    "    tissue_name = full_table_name.replace(\"GTEx_sQTL_allpairs_\", \"\").replace(f\"_{version}_{build}.ht\", \"\")\n",
    "\n",
    "    json_entry = {\n",
    "            \"annotation_db\": {\n",
    "                \"key_properties\": []\n",
    "            },\n",
    "            \"description\": f\"GTEx: {tissue_name} sQTL tissue-specific all SNP gene \"\n",
    "                           f\"associations Hail Table. All variant-gene cis-sQTL associations \"\n",
    "                           f\"tested in each tissue (including non-significant associations).\",\n",
    "            \"url\": \"https://gtexportal.org/home/datasets\",\n",
    "            \"versions\": [\n",
    "                {\n",
    "                    \"reference_genome\": build,\n",
    "                    \"url\": {\n",
    "                        \"aws\": {\n",
    "                            \"us\": f\"{aws_url}\"\n",
    "                        },\n",
    "                        \"gcp\": {\n",
    "                            \"us\": f\"{gs_us_url}\",\n",
    "                            \"eu\": f\"{gs_eu_url}\"\n",
    "                        }\n",
    "                    },\n",
    "                    \"version\": version\n",
    "                }\n",
    "            ]\n",
    "        }\n",
    "    datasets[f\"GTEx_sQTL_allpairs_{tissue_name}\"] = json_entry\n",
    "\n",
    "# Write new entries back to datasets.json config:\n",
    "with open(datasets_path, \"w\") as f:\n",
    "    json.dump(datasets, f, sort_keys=True, ensure_ascii=False, indent=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create schemas for docs for sQTL Hail Tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    },
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "import textwrap\n",
    "\n",
    "output_dir = os.path.abspath(\"../../hail/python/hail/docs/datasets/schemas\")\n",
    "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n",
    "with open(datasets_path, \"r\") as f:\n",
    "    datasets = json.load(f)\n",
    "\n",
    "names = [name for name in list(datasets.keys()) if \"GTEx_sQTL_allpairs_\" in name]\n",
    "for name in names:\n",
    "    versions = sorted(set(dataset[\"version\"] for dataset in datasets[name][\"versions\"]))\n",
    "    if not versions:\n",
    "        versions = [None]\n",
    "    reference_genomes = sorted(set(dataset[\"reference_genome\"] for dataset in datasets[name][\"versions\"]))\n",
    "    if not reference_genomes:\n",
    "        reference_genomes = [None]\n",
    "\n",
    "    print(name)\n",
    "    print(versions[0])\n",
    "    print(reference_genomes[0] + \"\\n\")\n",
    "\n",
    "    path = [dataset[\"url\"][\"gcp\"][\"us\"]\n",
    "            for dataset in datasets[name][\"versions\"]\n",
    "            if all([dataset[\"version\"] == versions[0],\n",
    "                    dataset[\"reference_genome\"] == reference_genomes[0]])]\n",
    "    assert len(path) == 1\n",
    "    path = path[0]\n",
    "\n",
    "    table = hl.methods.read_table(path)\n",
    "    description = table.describe(handler=lambda x: str(x)).split(\"\\n\")\n",
    "    description = \"\\n\".join([line.rstrip() for line in description])\n",
    "\n",
    "    if path.endswith(\".ht\"):\n",
    "        table_class = \"hail.Table\"\n",
    "    else:\n",
    "        table_class = \"hail.MatrixTable\"\n",
    "\n",
    "    template = \"\"\".. _{dataset}:\n",
    "\n",
    "{dataset}\n",
    "{underline1}\n",
    "\n",
    "*  **Versions:** {versions}\n",
    "*  **Reference genome builds:** {ref_genomes}\n",
    "*  **Type:** :class:`{class}`\n",
    "\n",
    "Schema ({version0}, {ref_genome0})\n",
    "{underline2}\n",
    "\n",
    ".. code-block:: text\n",
    "\n",
    "{schema}\n",
    "\n",
    "\"\"\"\n",
    "    context = {\n",
    "        \"dataset\": name,\n",
    "        \"underline1\": len(name) * \"=\",\n",
    "        \"version0\": versions[0],\n",
    "        \"ref_genome0\": reference_genomes[0],\n",
    "        \"versions\": \", \".join([str(version) for version in versions]),\n",
    "        \"ref_genomes\": \", \".join([str(reference_genome) for reference_genome in reference_genomes]),\n",
    "        \"underline2\": len(\"\".join([\"Schema (\", str(versions[0]), \", \", str(reference_genomes[0]), \")\"])) * \"~\",\n",
    "        \"schema\": textwrap.indent(description, \"    \"),\n",
    "        \"class\": table_class\n",
    "     }\n",
    "    with open(output_dir + f\"/{name}.rst\", \"w\") as f:\n",
    "        f.write(template.format(**context).strip())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
